'''
dataSplit
'''
import os
import random
from base import VOC2007_DATASET_PATH as VOC_PATH

xml_path = VOC_PATH + '/Annotations'
base_path = './Main'

# 1 样本名字
tmp = []
img_names = os.listdir(xml_path)
for i in img_names:
    if i.endswith('.xml'):
        tmp.append(i[:-4])

# 2 数据集划分
trainval_ratio = 0.9
train_ratio = 0.9
N = len(tmp)
trainval_num = int(trainval_ratio * N)
train_num = int(train_ratio * trainval_num)
trainval_idx = random.sample(range(N), trainval_num)
train_idx = random.sample(trainval_idx, train_num)

# 3 数据集的存储地址
ftrainval = open(os.path.join(base_path, 'LS_trainval.txt'), 'w')
ftrain = open(os.path.join(base_path, 'LS_train.txt'), 'w')
fval = open(os.path.join(base_path, 'LS_val.txt'), 'w')
ftest = open(os.path.join(base_path, 'LS_test.txt'), 'w')

# 4 写入数据
for i in range(N):
    name = tmp[i] + '\n'
    if i in trainval_idx:
        ftrainval.write(name)
        if i in train_idx:
            ftrain.write(name)
        else:
            fval.write(name)
    else:
        ftest.write(name)

# 5 关闭文件
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
